getwd()
## [1] "/home/collindabbieri/Documents/AppliedRegressionAnalysis/Labs/Lab7"
library(readxl)
prodqual=read_excel("../../Dataxls/Excel/PRODQUAL.XLS")
head(prodqual)
## # A tibble: 6 x 3
## TEMP PRESSURE QUALITY
## <dbl> <dbl> <dbl>
## 1 80 50 50.8
## 2 80 50 50.7
## 3 80 50 49.4
## 4 80 55 93.7
## 5 80 55 90.9
## 6 80 55 90.9
Answer Example 12.3
Fit the complete second order model to the data
quality is y temp is x1 pressure is x2
\[E(y)=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2+\beta_4x_1^2+\beta_5x_2^2\]
y=prodqual$QUALITY
x1=prodqual$TEMP
x2=prodqual$PRESSURE
TEMP2=prodqual$TEMP^2
PRESS2=prodqual$PRESSURE^2
prodmod=lm(QUALITY~TEMP+PRESSURE+TEMP:PRESSURE+TEMP2+PRESS2,data=prodqual)
summary(prodmod)
##
## Call:
## lm(formula = QUALITY ~ TEMP + PRESSURE + TEMP:PRESSURE + TEMP2 +
## PRESS2, data = prodqual)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82593 -0.95370 0.07407 1.01852 2.95185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.128e+03 1.103e+02 -46.49 < 2e-16 ***
## TEMP 3.110e+01 1.344e+00 23.13 < 2e-16 ***
## PRESSURE 1.397e+02 3.140e+00 44.51 < 2e-16 ***
## TEMP2 -1.334e-01 6.853e-03 -19.46 6.46e-15 ***
## PRESS2 -1.144e+00 2.741e-02 -41.74 < 2e-16 ***
## TEMP:PRESSURE -1.455e-01 9.692e-03 -15.01 1.06e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.679 on 21 degrees of freedom
## Multiple R-squared: 0.993, Adjusted R-squared: 0.9913
## F-statistic: 596.3 on 5 and 21 DF, p-value: < 2.2e-16
Sketch the response surface (3D plot of model fit)
print(min(prodqual$TEMP))
## [1] 80
print(max(prodqual$TEMP))
## [1] 100
print(min(prodqual$PRESSURE))
## [1] 50
print(max(prodqual$PRESSURE))
## [1] 60
print(prodmod$coefficients)
## (Intercept) TEMP PRESSURE TEMP2 PRESS2
## -5127.8990741 31.0963889 139.7472222 -0.1333889 -1.1442222
## TEMP:PRESSURE
## -0.1455000
library(rgl)
return_fitval=function(x1,x2){
coeff=prodmod$coefficients
val=coeff[1]+coeff[2]*x1+coeff[3]*x2+coeff[6]*x1*x2+coeff[4]*x1^2+coeff[5]*x2^2
return(val)
}
x_plot=c()
y_plot=c()
z=c()
x1_pred=seq(80,100,by=0.1)
x2_pred=seq(50,60,by=0.1)
for(i in x1_pred){
for(j in x2_pred){
val=return_fitval(i,j)
x_plot=append(x_plot,i)
y_plot=append(y_plot,j)
z=append(z,val)
}
}
plot3d(x_plot,y_plot,z,type='l',xlab='x1',ylab='x2',zlab='y')
rglwidget()
Test the overall utility of the model (model summary)
summary(prodmod)
##
## Call:
## lm(formula = QUALITY ~ TEMP + PRESSURE + TEMP:PRESSURE + TEMP2 +
## PRESS2, data = prodqual)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82593 -0.95370 0.07407 1.01852 2.95185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.128e+03 1.103e+02 -46.49 < 2e-16 ***
## TEMP 3.110e+01 1.344e+00 23.13 < 2e-16 ***
## PRESSURE 1.397e+02 3.140e+00 44.51 < 2e-16 ***
## TEMP2 -1.334e-01 6.853e-03 -19.46 6.46e-15 ***
## PRESS2 -1.144e+00 2.741e-02 -41.74 < 2e-16 ***
## TEMP:PRESSURE -1.455e-01 9.692e-03 -15.01 1.06e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.679 on 21 degrees of freedom
## Multiple R-squared: 0.993, Adjusted R-squared: 0.9913
## F-statistic: 596.3 on 5 and 21 DF, p-value: < 2.2e-16
The p value for the F test tells us that the model is adequate for explaining the data.
Reproduce example 11.11 using R
x1=c(120.0,65.0,150.0,1073.8,150.0,610.0,88.2,88.2,88.2,90.0,30.0,441.0,441.0,441.0,441.0,627.0,610.0,150.0,1089.5,125.0,120.0,65.0,150.0,150.0,150.0,150.0,610.0,90.0,30.0,441.0,441.0,441.0,441.0,627.0,610.0,30.0)
x2=c(375,750,500,2170,325,1500,399,399,399,1140,325,410,410,410,410,1525,1500,500,2170,750,375,750,500,250,500,325,1500,1140,325,410,410,410,410,1525,1500,325)
y=c(3137,3590,4526,10825,4023,7606,3748,2972,3163,4065,2048,6500,5651,6565,6387,6454,6928,4268,14791,2680,2974,1965,2566,1515,2000,2735,3698,2635,1206,3775,3120,4206,4006,3728,3211,1200)
print(length(x1))
## [1] 36
print(length(x2))
## [1] 36
print(length(y))
## [1] 36
\[E(y) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2\]
mod2=lm(y~x1+x2+x1:x2)
Test the overall utility of the model using the global F test at \(\alpha=0.05\)
summary(mod2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2964.74 -914.17 53.46 1028.66 2839.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.013e+03 7.324e+02 4.114 0.000254 ***
## x1 3.786e+00 2.135e+00 1.774 0.085659 .
## x2 -1.529e+00 1.083e+00 -1.412 0.167689
## x1:x2 3.439e-03 1.540e-03 2.233 0.032662 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1472 on 32 degrees of freedom
## Multiple R-squared: 0.7287, Adjusted R-squared: 0.7033
## F-statistic: 28.65 on 3 and 32 DF, p-value: 3.43e-09
The p-value of the F test is significantly less than 0.05, so we can reject the NULL \(H_0:\beta_1=\beta_2=\beta_3=0\). Therefore, the model is adequate at the 95% confidence level.
Test the hypothesis (at \(\alpha=0.05\)) that the slope of the relationship between man-hours (y) and boiler capacity (x1) increases as the design pressure (x2) increases. That is, the capacity and pressure interact positively
Using the T-test, we see a one-tailed p-value of 0.0327/2=0.01635, meaning we can reject the NULL that \(\beta_3=0\) and claim, with 95% confidence, that the two interact positively.
Estimate the change in man-hours (y) for every 1-psi increase in design pressure when boiler capacity is 750.
Estimated \(x_2\) slope=\(\hat{\beta_2}+\hat{\beta_3}x_1=-1.53+0.003(750)=0.72\)
What is the Null for the global F test? \(H_0: \beta_1=\beta_2=\beta_3=0\)
Why is the alternate test in part b) one sided?
The test in part b) is one sided because we are testing for \(H_a:\beta_3>0\) instead of \(H_a:\beta_3\neq 0\)
Derive the formula in part c)
One might naively assume that the slope in \(x_2\) is just \(\hat{\beta_2}\). However, since interaction is present, the rate of change of man-hours with the design pressure depends on \(x_1\). This gives us the equation in c)
Answer question 12.24 pg 660 using the WAFER2 data
wafer=read_excel("../../Dataxls/Excel/WAFER2.xls")
head(wafer)
## # A tibble: 6 x 3
## OXIDE TIME POLYSIL
## <dbl> <dbl> <dbl>
## 1 1059 18 494
## 2 1049 35 853
## 3 1039 52 1090
## 4 1026 52 1058
## 5 1001 18 517
## 6 986 35 882
Write a complete second-order model for polysilicon thickness (y) as a function of oxide thickness (x1) and deposition time (x2)
\[E(y)=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2+\beta_4x_1^2+\beta_5x_2^2\]
Fit the model to the data. Give the least squares prediction equation
OXIDE2=wafer$OXIDE^2
TIME2=wafer$TIME^2
wafmod=lm(POLYSIL~OXIDE+TIME+OXIDE:TIME+OXIDE2+TIME2,data=wafer)
summary(wafmod)
##
## Call:
## lm(formula = POLYSIL ~ OXIDE + TIME + OXIDE:TIME + OXIDE2 + TIME2,
## data = wafer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.413 -20.210 -2.474 13.432 91.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.104e+02 2.022e+02 1.040 0.31358
## OXIDE -4.634e-01 2.355e-01 -1.968 0.06662 .
## TIME 4.521e+01 7.273e+00 6.216 1.24e-05 ***
## OXIDE2 1.824e-04 1.140e-04 1.600 0.12914
## TIME2 -3.019e-01 9.762e-02 -3.093 0.00698 **
## OXIDE:TIME -7.509e-03 2.404e-03 -3.124 0.00654 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.84 on 16 degrees of freedom
## Multiple R-squared: 0.9849, Adjusted R-squared: 0.9802
## F-statistic: 209.2 on 5 and 16 DF, p-value: 5.506e-14
Conduct a test to determine if the quadratic terms in the model are necessary (use \(\alpha=0.05\))
Using the p-value from the T statistic, the quadratic term for the oxide appears to be unnecessary.
Conduct a test to determine if oxide thickness and deposition time interact. Use \(\alpha=0.05\)
The p-value from the T statistic for the interaction term is significantly less than 0.05. Therefore, we can say with 95% confidence that oxide thickness and deposition time interact.
Based on your results, parts c) and d), what model modifications do you recommend? Explain.
I would recommend creating a model with the quadratic oxide term removed. This term had the highest p-value of the T statistic, and is not adequate at the 95% confidence level.
Write a function mysecorder() that will take any xls data in the form of columns x1,x2 and y and perform a full MLR analysis
it needs to: Take arguments filename, alpha Produce a list containing -summary y.lm info -Coefficients (pt estimates) (1-\(\alpha\))100% ci for the betas (use ciReg() in s20x) A graph of the estimated trend paraboloid using rgl -graph should have data plotted -and the paraboloid -all axis labelled
Use function on WAFER2 and place output here
library(s20x)
mysecorder=function(filename,alpha=0.05){
data=read_excel(filename)
x1=unlist(data[,1])
x2=unlist(data[,2])
y=unlist(data[,3])
x12=x1^2
x22=x2^2
model=lm(y~x1+x2+x1:x2+x12+x22)
coeff=model$coefficients
cis=ciReg(model,conf.level=1-alpha,print.out=FALSE)
x=seq(min(x1),max(x1),length.out=40)
y=seq(min(x2),max(x2),length.out=40)
x_plot=c()
y_plot=c()
z=c()
for(i in x){
for(j in y){
val=coeff[1]+coeff[2]*x+coeff[3]*y+coeff[4]*x^2+coeff[5]*y^2+coeff[6]*x*y
z=append(z,val)
x_plot=append(x_plot,i)
y_plot=append(y_plot,j)
}
}
open3d()
mfrow3d(2,1)
plot3d(x1,x2,y)
plot3d(x_plot,y_plot,z,type='l',xlab='x1',ylab='x2',zlab='y')
return(list(summary=summary(model),coefficients=coeff,cis=cis))
}
wafmodel=mysecorder("../../Dataxls/Excel/WAFER2.xls")
rglwidget()
wafmodel
## $summary
##
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2 + x12 + x22)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.413 -20.210 -2.474 13.432 91.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.104e+02 2.022e+02 1.040 0.31358
## x1 -4.634e-01 2.355e-01 -1.968 0.06662 .
## x2 4.521e+01 7.273e+00 6.216 1.24e-05 ***
## x12 1.824e-04 1.140e-04 1.600 0.12914
## x22 -3.019e-01 9.762e-02 -3.093 0.00698 **
## x1:x2 -7.509e-03 2.404e-03 -3.124 0.00654 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.84 on 16 degrees of freedom
## Multiple R-squared: 0.9849, Adjusted R-squared: 0.9802
## F-statistic: 209.2 on 5 and 16 DF, p-value: 5.506e-14
##
##
## $coefficients
## (Intercept) x1 x2 x12 x22
## 2.104132e+02 -4.634462e-01 4.521013e+01 1.824256e-04 -3.019411e-01
## x1:x2
## -7.509131e-03
##
## $cis
## 95 % C.I.lower 95 % C.I.upper
## (Intercept) -2.182835e+02 6.391098e+02
## x1 -9.626031e-01 3.571058e-02
## x2 2.979105e+01 6.062921e+01
## x12 -5.926726e-05 4.241185e-04
## x22 -5.088924e-01 -9.498981e-02
## x1:x2 -1.260452e-02 -2.413740e-03